This document contains all of our analyses.
game_df = read_csv("./data/game_2.csv") %>%
separate(release_date, into = c("month_day","year"), sep = ",") %>%
separate(genre, into = "genre", sep = ",")
game_df %>%
mutate(text_label = str_c("Title: ", title, "\nPublisher: ", publisher)) %>%
plot_ly(x = ~meta_score, y = ~user_score, type = "scatter", text = ~text_label)
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
game_df %>%
group_by(genre) %>%
mutate(count = n()) %>%
filter(count > 3) %>%
ggscatter(., x = "meta_score", y = "user_score", add = "reg.line") +
facet_wrap(~genre) +
stat_cor(label.x = 3, label.y = 11) +
stat_regline_equation(label.x = 3, label.y = 10)
## `geom_smooth()` using formula 'y ~ x'
game_df %>%
group_by(platform) %>%
mutate(count = n()) %>%
filter(count > 3) %>%
ggscatter(., x = "meta_score", y = "user_score", add = "reg.line") +
facet_wrap(~platform) +
stat_cor(label.x = 3, label.y = 11) +
stat_regline_equation(label.x = 3, label.y = 10)
## `geom_smooth()` using formula 'y ~ x'
game_df %>%
select(meta_score, user_score, total_sale) %>%
mutate(user_score = as.numeric(user_score)) %>%
rquery.cormat()
## $r
## total_sale meta_score user_score
## total_sale 1
## meta_score 0.3 1
## user_score 0.073 0.52 1
##
## $p
## total_sale meta_score user_score
## total_sale 0
## meta_score 1.8e-45 0
## user_score 0.00089 4.1e-141 0
##
## $sym
## total_sale meta_score user_score
## total_sale 1
## meta_score 1
## user_score . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
game_df %>%
group_by(genre) %>%
mutate(count = n(),
genre = str_c(genre, " (n=", count, ")")) %>%
ungroup() %>%
mutate(genre = fct_reorder(genre, total_sale),
text_label = str_c("Title: ", title, "\nPublisher: ", publisher)) %>%
plot_ly(x = ~total_sale, color = ~genre, type = "box", colors = "viridis", text = ~text_label)
let’s define popular games as total sales >= 3.5
game_df %>%
filter(total_sale >= 3.5) %>%
mutate(year = as.numeric(year)) %>%
mutate(period = case_when(year >= '1996'& year <= '1999' ~ '1996-1999',
year >= '2000'& year <= '2003' ~ '2000-2003',
year >= '2004'& year <= '2006' ~ '2004-2006',
year >= '2007'& year <= '2009' ~ '2007-2009',
year >= '2010'& year <= '2012' ~ '2010-2012',
year >= '2013'& year <= '2015' ~ '2013-2015',
year >= '2016'& year <= '2018' ~ '2016-2018'),
period = as.factor(period)) %>%
group_by(period) %>%
summarize(number = n()) %>%
ungroup() %>%
plot_ly(x = ~period, y = ~number, type = 'bar', colors = "viridis")
game_df %>%
filter(total_sale >= 3.5) %>%
mutate(year = as.numeric(year)) %>%
mutate(period = case_when(year >= '1996'& year <= '1999' ~ '1996-1999',
year >= '2000'& year <= '2003' ~ '2000-2003',
year >= '2004'& year <= '2006' ~ '2004-2006',
year >= '2007'& year <= '2009' ~ '2007-2009',
year >= '2010'& year <= '2012' ~ '2010-2012',
year >= '2013'& year <= '2015' ~ '2013-2015',
year >= '2016'& year <= '2018' ~ '2016-2018'),
period = as.factor(period)) %>%
mutate(text_label = str_c("Title: ", title,' Total_sale:',total_sale)) %>%
plot_ly(x = ~period, y = ~total_sale, type = 'scatter', colors = "viridis", color = ~title, text = ~text_label, showlegend = F)